# -*- coding: utf-8 -*-
import random
import uuid
import scrapy
from scrapy import Request
from scrapy.utils.project import get_project_settings
from scrapy.exceptions import IgnoreRequest
from zc_core.util.http_util import retry_request
from agshop.rules import *
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.done_filter import DoneFilter
from zc_core.dao.sku_dao import SkuDao
from zc_core.spiders.base import BaseSpider


class FullSpider(BaseSpider):
    name = 'full'
    custom_settings = {
        'CONCURRENT_REQUESTS': 12,
        # 'DOWNLOAD_DELAY': 0.1,
        'CONCURRENT_REQUESTS_PER_DOMAIN': 12,
        'CONCURRENT_REQUESTS_PER_IP': 12,
    }
    # 常用链接
    item_url = 'https://agshop.delinzl.cn/api/goodsItem/detail?id={}&isIntranet=false'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)

    def start_requests(self):
        # pool_list = SkuPoolDao().get_sku_pool_list()
        # self.logger.info('全量：%s' % (len(pool_list)))
        settings = get_project_settings()
        pool_list = SkuDao().get_batch_sku_list(
            batch_no=self.batch_no,
            fields={"_id": 1, "catalog3Id": 1, "soldCount": 1},
            query={
                # 'soldCount': {'$gt': 0},
                'catalog1Id': {'$in': list(settings.get('CAT1_WHITE_LIST', {}).keys())}
            }
        )
        self.logger.info('全量：%s' % (len(pool_list)))
        dist_list = [x for x in pool_list if not self.done_filter.contains(x.get('_id'))]
        self.logger.info('目标：%s' % (len(dist_list)))
        random.shuffle(dist_list)
        for sku in dist_list:
            sku_id = sku.get('_id')
            cat3_id = sku.get('catalog3Id')
            sold_count = sku.get('soldCount')
            offline_time = sku.get('offlineTime', 0)
            if offline_time > settings.get('MAX_OFFLINE_TIME', 2):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue
            # 避免重复采集
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: [%s]', sku_id)
                continue
            # 采集商品
            yield Request(
                url=self.item_url.format(sku_id),
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                    'catalog3Id': cat3_id,
                    'soldCount': sold_count,
                },
                headers={
                    'Metis-ReqId': str(uuid.uuid4()),
                    'Metis-Tenant': '8dd10f97bc684b4fbc845c0b344883aa',
                    'Host': 'agshop.delinzl.cn',
                    'Connection': 'keep-alive',
                    'Accept': 'application/json, text/plain, */*',
                    'Metis-InternalModule': 'aaaa',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                    'Origin': 'https://agshop.delinzl.cn',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3870.400 QQBrowser/10.8.4405.400',
                    'Content-Type': 'application/json',
                    'Referer': f'https://agshop.delinzl.cn/detail?id={sku_id}',
                },
                callback=self.parse_item_data,
                errback=self.error_back,
            )

    # 处理ItemData
    def parse_item_data(self, response):
        meta = response.meta
        sku_id = meta.get('skuId')
        item = parse_item_data(response)
        if item:
            self.logger.info('商品: [%s]' % sku_id)
            yield item
        else:
            self.logger.error('下架: sku=%s' % sku_id)
